**************************
***	RUN USING STATA 16 ***
**************************
* Purpose: analyse variation in content and cognitive domains covered by WASSCE items each year
* Last Updated: 06 May 2021
	
*********************************
*** MATHS domain analysis  ******
*********************************

	//start from domain mapping
	import excel "$input\math domains.xlsx", sheet("maths_Obj_Sub") firstrow case(lower) clear
	
	//test type
	encode testtype, gen(test)
	drop testtype
	
	//assign content domain
	gen d = ""
	replace d = "algebraic processes" if inlist(contentdomain, "Algebra", "Algebra ???", "algebra", "algebra???",  "functions????", "matrices???", "Algebra ")
	replace d = "number and numeration" if inlist(contentdomain, "Number", "number", "binary operations")
	replace d = "mensuration" if inlist(contentdomain, "mensnsuration", "mensuration", "mensuration", "mensuration ")        
	replace d = "plane geometry" if inlist(contentdomain, "plane geometry", "geometry", "transformation", "vectors", "vectors ", "vectors???", "vectors/algebra")
	replace d = "plane geometry" if inlist(contentdomain, "construction???", "consruction???", "Construction???")
	replace d = "coordinate geometry" if inlist(contentdomain, "coordinate geometry", "mapping?????", "integration???")
	replace d = "trigonometry" if inlist(contentdomain, "trigonometry", "trigonometry ?")
	replace d = "statistics and probability" if inlist(contentdomain, "probability", "statistics", "statistics/prob", "Statistics")
	
	///make numeric domain
	gen domain = .
	replace domain = 1 if d == "number and numeration"
	replace domain = 2 if d == "algebraic processes"
	replace domain = 3 if d == "mensuration"
	replace domain = 4 if d == "plane geometry"
	replace domain = 5 if d == "coordinate geometry"
	replace domain = 6 if d == "trigonometry"
	replace domain = 7 if d == "statistics and probability"
	
	label def domain 1 "Number" 2 "Algebra" 3 "Mensuration" 4 "Plane Geo." 5 "Coordinate Geo." 6 "Trigonometry" 7 "Stats / Prob."
	lab values domain domain
	
	//cognitive domain
	replace level = levelofthinking if level==""
	replace level = "f" if inlist(level, "too high", "bonus/error", "???", "")
	encode level, gen(cognitive)
	label def level 1 "Recall" 2 "Comprehension" 3 "Application" 4 "Analysis" 5 "Evaluation" 6 "N/A"
	lab values cognitive level
	
	
**********************
******* GRAPHS *******
**********************	
	keep year test domain cognitive
		
*** prep for content domains
	preserve
	gen x=1 
	replace x=4 if test==2 /*this line to scale item values*/
	collapse (sum) x , by(year test domain)
	
*** plot for content domains	
	tw ///
	(scatter domain year if test==2 [w=x], ///
	mfc(gs9) mlc(gs9) mlw(medthick) msize(*2.2) /*this deals with the weighting within tests*/ ) ///
	(scatter domain year if test==1 [w=x], ///
	mfc(white) mlc(black) mlw(thin)) ///
	, legend(order(2 1) lab(2 "Multiple-Choice Items") lab(1 "Constructed-Responses Items") col(2) pos(6) size(10pt)) ///
	xtick(2010(1)2020, notick) ///
	xlabel(2011(1)2019, labsize(10pt)) ///
	ylabel(1(1)7, valuelabel labsize(10pt)) ///
	xtitle("") ytitle("") ///
	plotregion(margin(b+4 l-7 r-7)) ///
	xsize(4.5) ysize(3)
	graph export "$graph\1a.png", replace
	graph export "$graph\1a.eps", replace
	restore

*** prep for cognitive domains
	preserve
	gen x=1 
	replace x=4 if test==2 /*this line to scale item values*/
	collapse (sum) x , by(year test cognitive)
	
*** offset marker location for selected locations to show where marks are equal
	gen yrc = year
	replace yrc = year+0.05 if inlist(year,2014,2015,2016) & test==2 & cognitive==2 	// for comprehension
	replace yrc = year-0.05 if inlist(year,2014,2015,2016) & test==1 & cognitive==2 	// for comprehension
	replace yrc = year+0.05 if inlist(year,2011,2014) & test==2 & cognitive==1 			// for recall
	replace yrc = year-0.05 if inlist(year,2011,2014) & test==1 & cognitive==1 			// for recall
	
*** plot for cognitive domains	
	tw ///
	(scatter cognitive yrc if cognitive<6 & test==2 [w=x], ///
	mfc(gs9) mlc(gs9) mlw(medthick) msize(*2.2)/*scale within tests*/) ///
	(scatter cognitive yrc if cognitive<6 & test==1 [w=x], ///
	mfc(white) mlc(black) mlw(thin)) ///
	(scatter cognitive yrc if cognitive==2 & test==2 & year==2017 [w=x], ///
	mfc(gs9) mlc(gs9) mlw(medthick) msize(*3)/*scale within tests*/) ///
	, legend(order(2 1) lab(2 "Multiple-Choice Items") lab(1 "Constructed-Responses Items") col(2) pos(6) size(10pt)) ///
	xtick(2010(1)2020, notick) ///
	xlabel(2011(1)2019, labsize(10pt)) ///
	ylabel(1 "Recall" 2 "Comprehend" 3 "Apply" 4 "Analyze" 5 "Evaluate", labsize(10pt)) ///
	xtitle("") ytitle("") ///
	plotregion(margin(b+4 l-7 r-7)) ///
	graphregion(margin(l+6)) ///
	xsize(4.5) ysize(3)
	graph export "$graph\1b.png", replace
	graph export "$graph\1b.eps", replace
	restore